
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_excel(r'C:\Users\Nicholas Ra/Bank_Personal_Loan_Modelling.xlsx',1)
df.head()
| ID | Age | Experience | Income | ZIP Code | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 25 | 1 | 49 | 91107 | 4 | 1.6 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1 | 2 | 45 | 19 | 34 | 90089 | 3 | 1.5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2 | 3 | 39 | 15 | 11 | 94720 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 4 | 35 | 9 | 100 | 94112 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 5 | 35 | 8 | 45 | 91330 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 |
df.shape
(5000, 14)
df.isnull().sum()
ID 0 Age 0 Experience 0 Income 0 ZIP Code 0 Family 0 CCAvg 0 Education 0 Mortgage 0 Personal Loan 0 Securities Account 0 CD Account 0 Online 0 CreditCard 0 dtype: int64
df.drop(['ID','ZIP Code'],axis=1,inplace=True)
df.columns
Index(['Age', 'Experience', 'Income', 'Family', 'CCAvg', 'Education',
'Mortgage', 'Personal Loan', 'Securities Account', 'CD Account',
'Online', 'CreditCard'],
dtype='object')
import plotly.express as px
fig=px.box(df,y=['Age','Experience','Income','Family','Education'])
fig.show()
df.skew()
Age -0.029341 Experience -0.026325 Income 0.841339 Family 0.155221 CCAvg 1.598457 Education 0.227093 Mortgage 2.104002 Personal Loan 2.743607 Securities Account 2.588268 CD Account 3.691714 Online -0.394785 CreditCard 0.904589 dtype: float64
df.dtypes
Age int64 Experience int64 Income int64 Family int64 CCAvg float64 Education int64 Mortgage int64 Personal Loan int64 Securities Account int64 CD Account int64 Online int64 CreditCard int64 dtype: object
df.hist(figsize=(20,20))
array([[<AxesSubplot:title={'center':'Age'}>,
<AxesSubplot:title={'center':'Experience'}>,
<AxesSubplot:title={'center':'Income'}>],
[<AxesSubplot:title={'center':'Family'}>,
<AxesSubplot:title={'center':'CCAvg'}>,
<AxesSubplot:title={'center':'Education'}>],
[<AxesSubplot:title={'center':'Mortgage'}>,
<AxesSubplot:title={'center':'Personal Loan'}>,
<AxesSubplot:title={'center':'Securities Account'}>],
[<AxesSubplot:title={'center':'CD Account'}>,
<AxesSubplot:title={'center':'Online'}>,
<AxesSubplot:title={'center':'CreditCard'}>]], dtype=object)
import seaborn as sns
sns.displot(df['Experience'])
<seaborn.axisgrid.FacetGrid at 0x1f3e1080190>
df['Experience'].mean()
20.1046
Negative_exp=df[df['Experience']<0]
Negative_exp.head()
| Age | Experience | Income | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 89 | 25 | -1 | 113 | 4 | 2.30 | 3 | 0 | 0 | 0 | 0 | 0 | 1 |
| 226 | 24 | -1 | 39 | 2 | 1.70 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 315 | 24 | -2 | 51 | 3 | 0.30 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 451 | 28 | -2 | 48 | 2 | 1.75 | 3 | 89 | 0 | 0 | 0 | 1 | 0 |
| 524 | 24 | -1 | 75 | 4 | 0.20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
sns.displot(Negative_exp['Age'])
<seaborn.axisgrid.FacetGrid at 0x1f3e14b0ac0>
Negative_exp['Experience'].mean()
-1.4423076923076923
Negative_exp.size
624
print('There are {} records which has negative values for experience, approx {} %'.format(Negative_exp.size , ((Negative_exp.size/df.size)*100)))
There are 624 records which has negative values for experience, approx 1.04 %
data=df.copy()
data.head()
| Age | Experience | Income | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 1 | 49 | 4 | 1.6 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1 | 45 | 19 | 34 | 3 | 1.5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2 | 39 | 15 | 11 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 35 | 9 | 100 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 35 | 8 | 45 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 |
np.where(data['Experience']<0,data['Experience'].mean(),data['Experience'])
array([ 1., 19., 15., ..., 39., 40., 4.])
data[data['Experience']<0]
| Age | Experience | Income | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 89 | 25 | -1 | 113 | 4 | 2.300000 | 3 | 0 | 0 | 0 | 0 | 0 | 1 |
| 226 | 24 | -1 | 39 | 2 | 1.700000 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 315 | 24 | -2 | 51 | 3 | 0.300000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 451 | 28 | -2 | 48 | 2 | 1.750000 | 3 | 89 | 0 | 0 | 0 | 1 | 0 |
| 524 | 24 | -1 | 75 | 4 | 0.200000 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 536 | 25 | -1 | 43 | 3 | 2.400000 | 2 | 176 | 0 | 0 | 0 | 1 | 0 |
| 540 | 25 | -1 | 109 | 4 | 2.300000 | 3 | 314 | 0 | 0 | 0 | 1 | 0 |
| 576 | 25 | -1 | 48 | 3 | 0.300000 | 3 | 0 | 0 | 0 | 0 | 0 | 1 |
| 583 | 24 | -1 | 38 | 2 | 1.700000 | 2 | 0 | 0 | 0 | 0 | 1 | 0 |
| 597 | 24 | -2 | 125 | 2 | 7.200000 | 1 | 0 | 0 | 1 | 0 | 0 | 1 |
| 649 | 25 | -1 | 82 | 4 | 2.100000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 670 | 23 | -1 | 61 | 4 | 2.600000 | 1 | 239 | 0 | 0 | 0 | 1 | 0 |
| 686 | 24 | -1 | 38 | 4 | 0.600000 | 2 | 0 | 0 | 0 | 0 | 1 | 0 |
| 793 | 24 | -2 | 150 | 2 | 2.000000 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 889 | 24 | -2 | 82 | 2 | 1.600000 | 3 | 0 | 0 | 0 | 0 | 1 | 1 |
| 909 | 23 | -1 | 149 | 1 | 6.333333 | 1 | 305 | 0 | 0 | 0 | 0 | 1 |
| 1173 | 24 | -1 | 35 | 2 | 1.700000 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1428 | 25 | -1 | 21 | 4 | 0.400000 | 1 | 90 | 0 | 0 | 0 | 1 | 0 |
| 1522 | 25 | -1 | 101 | 4 | 2.300000 | 3 | 256 | 0 | 0 | 0 | 0 | 1 |
| 1905 | 25 | -1 | 112 | 2 | 2.000000 | 1 | 241 | 0 | 0 | 0 | 1 | 0 |
| 2102 | 25 | -1 | 81 | 2 | 1.600000 | 3 | 0 | 0 | 0 | 0 | 1 | 1 |
| 2430 | 23 | -1 | 73 | 4 | 2.600000 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2466 | 24 | -2 | 80 | 2 | 1.600000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2545 | 25 | -1 | 39 | 3 | 2.400000 | 2 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2618 | 23 | -3 | 55 | 3 | 2.400000 | 2 | 145 | 0 | 0 | 0 | 1 | 0 |
| 2717 | 23 | -2 | 45 | 4 | 0.600000 | 2 | 0 | 0 | 0 | 0 | 1 | 1 |
| 2848 | 24 | -1 | 78 | 2 | 1.800000 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2876 | 24 | -2 | 80 | 2 | 1.600000 | 3 | 238 | 0 | 0 | 0 | 0 | 0 |
| 2962 | 23 | -2 | 81 | 2 | 1.800000 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2980 | 25 | -1 | 53 | 3 | 2.400000 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3076 | 29 | -1 | 62 | 2 | 1.750000 | 3 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3130 | 23 | -2 | 82 | 2 | 1.800000 | 2 | 0 | 0 | 1 | 0 | 0 | 1 |
| 3157 | 23 | -1 | 13 | 4 | 1.000000 | 1 | 84 | 0 | 0 | 0 | 1 | 0 |
| 3279 | 26 | -1 | 44 | 1 | 2.000000 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3284 | 25 | -1 | 101 | 4 | 2.100000 | 3 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3292 | 25 | -1 | 13 | 4 | 0.400000 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 3394 | 25 | -1 | 113 | 4 | 2.100000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3425 | 23 | -1 | 12 | 4 | 1.000000 | 1 | 90 | 0 | 0 | 0 | 1 | 0 |
| 3626 | 24 | -3 | 28 | 4 | 1.000000 | 3 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3796 | 24 | -2 | 50 | 3 | 2.400000 | 2 | 0 | 0 | 1 | 0 | 0 | 0 |
| 3824 | 23 | -1 | 12 | 4 | 1.000000 | 1 | 0 | 0 | 1 | 0 | 0 | 1 |
| 3887 | 24 | -2 | 118 | 2 | 7.200000 | 1 | 0 | 0 | 1 | 0 | 1 | 0 |
| 3946 | 25 | -1 | 40 | 3 | 2.400000 | 2 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4015 | 25 | -1 | 139 | 2 | 2.000000 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 4088 | 29 | -1 | 71 | 2 | 1.750000 | 3 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4116 | 24 | -2 | 135 | 2 | 7.200000 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4285 | 23 | -3 | 149 | 2 | 7.200000 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4411 | 23 | -2 | 75 | 2 | 1.800000 | 2 | 0 | 0 | 0 | 0 | 1 | 1 |
| 4481 | 25 | -2 | 35 | 4 | 1.000000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4514 | 24 | -3 | 41 | 4 | 1.000000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4582 | 25 | -1 | 69 | 3 | 0.300000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4957 | 29 | -1 | 50 | 2 | 1.750000 | 3 | 0 | 0 | 0 | 0 | 0 | 1 |
sns.heatmap(df.corr())
sns.heatmap(df.corr(),annot=True)
<AxesSubplot:>
data=data.drop(['Experience'],axis=1)
data.head()
| Age | Income | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 49 | 4 | 1.6 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 1 | 45 | 34 | 3 | 1.5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| 2 | 39 | 11 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 35 | 100 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 35 | 45 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 |
data['Education'].unique()
array([1, 2, 3], dtype=int64)
def mark(x):
if x==1:
return 'Undergrad'
elif x==2:
return 'Graduate'
else:
return 'Advanced/Professional'
data['Edu_mark']=data['Education'].apply(mark)
data.head()
| Age | Income | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | Edu_mark | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 49 | 4 | 1.6 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | Undergrad |
| 1 | 45 | 34 | 3 | 1.5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | Undergrad |
| 2 | 39 | 11 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | Undergrad |
| 3 | 35 | 100 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | Graduate |
| 4 | 35 | 45 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | Graduate |
EDU_dis=data.groupby('Edu_mark')['Age'].count()
EDU_dis
Edu_mark Advanced/Professional 1501 Graduate 1403 Undergrad 2096 Name: Age, dtype: int64
fig=px.pie(data,values=EDU_dis, names=EDU_dis.index,title='Pie Chart')
fig.show()
data.columns
Index(['Age', 'Income', 'Family', 'CCAvg', 'Education', 'Mortgage',
'Personal Loan', 'Securities Account', 'CD Account', 'Online',
'CreditCard', 'Edu_mark'],
dtype='object')
def Security_CD(row):
if(row['Securities Account']==1) & (row['CD Account']==1):
return 'Holds Securities & Deposit'
elif (row['Securities Account']==0) & (row['CD Account']==0):
return 'holds Securities & Deposit'
elif (row['Securities Account']==1) & (row['CD Account']==0):
return 'holds Securities & Deposit'
elif (row['Securities Account']==0) & (row['CD Account']==1):
return 'holds Securities & Deposit'
data['Account_holder_category']=data.apply(Security_CD,axis=1)
data.head()
| Age | Income | Family | CCAvg | Education | Mortgage | Personal Loan | Securities Account | CD Account | Online | CreditCard | Edu_mark | Account_holder_category | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 25 | 49 | 4 | 1.6 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | Undergrad | holds Securities & Deposit |
| 1 | 45 | 34 | 3 | 1.5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | Undergrad | holds Securities & Deposit |
| 2 | 39 | 11 | 1 | 1.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | Undergrad | holds Securities & Deposit |
| 3 | 35 | 100 | 1 | 2.7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | Graduate | holds Securities & Deposit |
| 4 | 35 | 45 | 4 | 1.0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | Graduate | holds Securities & Deposit |
values=data['Account_holder_category'].value_counts()
values.index
Index(['holds Securities & Deposit', 'Holds Securities & Deposit'], dtype='object')
fig=px.pie(data,values=values,names=values.index,title='Pie_chart')
fig.show()
data.columns
Index(['Age', 'Income', 'Family', 'CCAvg', 'Education', 'Mortgage',
'Personal Loan', 'Securities Account', 'CD Account', 'Online',
'CreditCard', 'Edu_mark', 'Account_holder_category'],
dtype='object')
px.box(data,x='Education',y='Income',facet_col='Personal Loan')
sns.displot(data[data['Personal Loan']==0]['Income'])
<seaborn.axisgrid.FacetGrid at 0x25ace300a90>
sns.displot(data[data['Personal Loan']==0]['Income'])
sns.displot(data[data['Personal Loan']==1]['Income'])
<seaborn.axisgrid.FacetGrid at 0x25ace36b790>
data.columns
Index(['Age', 'Income', 'Family', 'CCAvg', 'Education', 'Mortgage',
'Personal Loan', 'Securities Account', 'CD Account', 'Online',
'CreditCard', 'Edu_mark', 'Account_holder_category'],
dtype='object')
col_name=['Securities Account','Online','Account_holder_category','CreditCard']
for i in col_name:
plt.figure(figsize=(10,5))
sns.countplot(x=i,hue='Personal Loan',data=data)
sns.scatterplot(data['Age'],data['Personal Loan'],hue=data['Family'])
C:\Anaconda3\envs\nikoenv\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
<AxesSubplot:xlabel='Age', ylabel='Personal Loan'>
import scipy.stats as stats
Ho='Age does not have impact on availing personal loan'
Ha='Age does not impact on availing personal loan'
Age_no=np.array(data[data['Personal Loan']==0]['Age'])
Age_yes=np.array(data[data['Personal Loan']==1]['Age'])
t,p_value=stats.ttest_ind(Age_no,Age_yes,axis=0)
if p_value<0.05:
print(Ha,'as the p_value is less than 0.05 with a value of {}'.format(p_value))
else:
print(Ho,'as the p_value is greater than 0.05 with a value of {}'.format(p_value))
Age does not have impact on availing personal loan as the p_value is greater than 0.05 with a value of 0.584959263705325
def Hypothesis(col1,col2,Ho,Ha):
arr1=np.array(data[data[col1]==0][col2])
arr2=np.array(data[data[col1]==1][col2])
t,p_value=stats.ttest_ind(arr1,arr2,axis=0)
if p_value<0.05:
print('{}, as the p_value is less than 0.05 with a value of {}'.format(Ha,p_value))
else:
print('{}, as the p_value is greater than 0.05 with a value of {}'.format(Ho,p_value))
Hypothesis('Personal Loan','Age',Ho='Age does not have impact on availing personal loan',Ha='Age does have an impact on availing personal loan')
Age does not have impact on availing personal loan, as the p_value is greater than 0.05 with a value of 0.584959263705325
Hypothesis(col1='Personal Loan',col2='Income',Ho='Income does not have impact on availing personal loan',Ha='Income does have an impact on availing personal loan')
Income does have an impact on availing personal loan, as the p_value is less than 0.05 with a value of 0.0
Hypothesis('Personal Loan','Family',Ho='Family does not have impact on availing personal loan',Ha='Family does have an impact on availing personal loan')
Family does have an impact on availing personal loan, as the p_value is less than 0.05 with a value of 1.4099040685673807e-05